# -*- coding: utf-8 -*-
from scrapy import Request
from scrapy.exceptions import IgnoreRequest
from scrapy.utils.project import get_project_settings
from zc_core.dao.item_pool_dao import ItemPoolDao
from zc_core.dao.sku_pool_dao import SkuPoolDao
from zc_core.util.batch_gen import time_to_batch_no
from zc_core.util.http_util import retry_request
from zc_core.dao.batch_dao import BatchDao
from zc_core.util.done_filter import DoneFilter
from zc_core.dao.item_data_dao import *
from chng.rules import *
from zc_core.spiders.base import BaseSpider


class FullCatSpider(BaseSpider):
    name = "full_cat"
    # 详情页url
    item_url = "http://ec.chng.com.cn/scm-hn-oauth-web/obs/business/product/managerView/getProductCategory?productId={}"

    def __init__(self, batchNo=None, *args, **kwargs):
        super(FullCatSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        # 创建批次记录
        BatchDao().create_batch(self.batch_no)
        # 避免重复采集
        self.done_filter = DoneFilter(self.batch_no)

    def start_requests(self):
        # sku_list = ItemPoolDao().get_item_pool_list(
        #     fields={'_id': 1},
        #     # query={"$or": [{"catalog2Id": {'$exists': False}}, {"catalog1Id": {'$exists': False}}]}
        #     # query={"$or": [{"catalog2Id": None}, {"catalog1Id": None}]}
        # )
        sku_list = ItemDataDao().get_batch_data_list(self.batch_no, query={"catalog1Id": {'$exists': False}})
        self.logger.info('任务：%s' % (len(sku_list)))
        for sku in sku_list:
            sku_id = sku.get("_id")
            yield Request(
                url=self.item_url.format(sku_id),
                headers={
                    'Host': 'ec.chng.com.cn',
                    'Connection': 'keep-alive',
                    'Content-Type': 'application/json',
                    'Accept': 'application/json, text/plain, */*',
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3775.400 QQBrowser/10.6.4208.400',
                    'Referer': 'http://ec.chng.com.cn/newmall',
                    'Accept-Encoding': 'gzip, deflate',
                    'Accept-Language': 'zh-CN,zh;q=0.9',
                },
                meta={
                    'reqType': 'catalog',
                    'batchNo': self.batch_no,
                    "skuId": sku_id,
                },
                callback=self.parse_item_cat,
                errback=self.error_back
            )

    def parse_item_cat(self, response):
        meta = response.meta
        sku_id = meta.get("skuId")
        item = parse_item_cat(response)
        if item:
            self.logger.info('商品分类: sku=%s' % sku_id)
            yield item
        else:
            self.logger.error('无分类: sku=%s' % sku_id)


